More information related to this lab can be found here.
Students in the 697 class need to complete the following challenges.
Print a graph to a png file using 3*ppi for the height and width and display the png file in the report using the above R Markdown format.
library(tidyverse)
## ── Attaching packages ──────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
mutate(report_type = "confirmed_cases") %>%
select(report_type, everything())
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed)
## # A tibble: 6 x 254
## report_type Province_State Country_Region Lat Long `1/22/20` `1/23/20`
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 confirmed_… <NA> Afghanistan 33.9 67.7 0 0
## 2 confirmed_… <NA> Albania 41.2 20.2 0 0
## 3 confirmed_… <NA> Algeria 28.0 1.66 0 0
## 4 confirmed_… <NA> Andorra 42.5 1.52 0 0
## 5 confirmed_… <NA> Angola -11.2 17.9 0 0
## 6 confirmed_… <NA> Antigua and B… 17.1 -61.8 0 0
## # … with 247 more variables: `1/24/20` <dbl>, `1/25/20` <dbl>, `1/26/20` <dbl>,
## # `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## # `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## # `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## # `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## # `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## # `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## # `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## # `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>,
## # `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>,
## # `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
## # `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>,
## # `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>,
## # `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
## # `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>,
## # `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>,
## # `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
## # `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>,
## # `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>,
## # `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>,
## # `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>,
## # `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>,
## # `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>,
## # `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>,
## # `4/28/20` <dbl>, `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>,
## # `5/2/20` <dbl>, …
library(tidyverse)
time_series_deaths <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
mutate(report_type = "confirmed_deaths") %>%
select(report_type, everything())
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_deaths)
## # A tibble: 6 x 254
## report_type Province_State Country_Region Lat Long `1/22/20` `1/23/20`
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 confirmed_… <NA> Afghanistan 33.9 67.7 0 0
## 2 confirmed_… <NA> Albania 41.2 20.2 0 0
## 3 confirmed_… <NA> Algeria 28.0 1.66 0 0
## 4 confirmed_… <NA> Andorra 42.5 1.52 0 0
## 5 confirmed_… <NA> Angola -11.2 17.9 0 0
## 6 confirmed_… <NA> Antigua and B… 17.1 -61.8 0 0
## # … with 247 more variables: `1/24/20` <dbl>, `1/25/20` <dbl>, `1/26/20` <dbl>,
## # `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## # `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## # `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## # `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## # `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## # `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## # `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## # `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>,
## # `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>,
## # `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
## # `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>,
## # `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>,
## # `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
## # `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>,
## # `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>,
## # `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
## # `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>,
## # `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>,
## # `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>,
## # `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>,
## # `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>,
## # `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>,
## # `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>,
## # `4/28/20` <dbl>, `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>,
## # `5/2/20` <dbl>, …
library(tidyverse)
time_series_joined <- bind_rows(time_series_confirmed, time_series_deaths) %>%
gather(key="date", value="count", 6:length(.)) %>%
mutate(date=lubridate::mdy(date)) %>%
spread(key="report_type", value="count") %>%
mutate(confirmed_cases=replace_na(confirmed_cases, 0),
confirmed_deaths=replace_na(confirmed_deaths, 0))
head(time_series_joined)
## # A tibble: 6 x 7
## Province_State Country_Region Lat Long date confirmed_cases
## <chr> <chr> <dbl> <dbl> <date> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 2020-01-22 0
## 2 <NA> Albania 41.2 20.2 2020-01-22 0
## 3 <NA> Algeria 28.0 1.66 2020-01-22 0
## 4 <NA> Andorra 42.5 1.52 2020-01-22 0
## 5 <NA> Angola -11.2 17.9 2020-01-22 0
## 6 <NA> Antigua and B… 17.1 -61.8 2020-01-22 0
## # … with 1 more variable: confirmed_deaths <dbl>
tail(time_series_joined)
## # A tibble: 6 x 7
## Province_State Country_Region Lat Long date confirmed_cases
## <chr> <chr> <dbl> <dbl> <date> <dbl>
## 1 <NA> Syria 34.8 39.0 2020-09-26 0
## 2 <NA> United Arab E… 23.4 53.8 2020-09-26 0
## 3 Montserrat United Kingdom 16.7 -62.2 2020-09-26 0
## 4 Turks and Cai… United Kingdom 21.7 -71.8 2020-09-26 0
## 5 <NA> Vietnam 14.1 108. 2020-09-26 0
## 6 <NA> Yemen 15.6 48.5 2020-09-26 0
## # … with 1 more variable: confirmed_deaths <dbl>
library(tidyverse)
time_series_sum <- time_series_joined %>%
group_by(date) %>%
summarize(cases = sum(confirmed_cases),
deaths = sum(confirmed_deaths)) %>%
mutate(deaths_cases = deaths/cases)
head(time_series_sum)
## # A tibble: 6 x 4
## date cases deaths deaths_cases
## <date> <dbl> <dbl> <dbl>
## 1 2020-01-22 555 17 0.0306
## 2 2020-01-23 654 18 0.0275
## 3 2020-01-24 941 26 0.0276
## 4 2020-01-25 1434 42 0.0293
## 5 2020-01-26 2118 56 0.0264
## 6 2020-01-27 2927 82 0.0280
library(tidyverse)
time_series_sum <- time_series_joined %>%
filter(Country_Region=="US") %>% #| Filter only US Data
group_by(date) %>%
summarize(cases = sum(confirmed_cases),
deaths = sum(confirmed_deaths)) %>%
mutate(deaths_cases = deaths/cases)
ppi=300
library(ggplot2)
p1 <- ggplot(time_series_sum, aes(x=date, y=deaths_cases)) +
geom_line() +
labs(x="", y="Deaths/Cases", title="Fatality Rate in the U.S.") +
theme_linedraw() +
theme(plot.title=element_text(face="bold", hjust=0.5),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank())
png("/Users/greg/Box/courses/umass/fall-2020/evolutionary-genomics-and-bioinformatics/evo-geno-course/images/fatality-rate-in-the-usa-plot.png",
width=3*ppi, height=3*ppi, res=ppi)
p1
dev.off()
## quartz_off_screen
## 2
Turn one of the exercises from Lab 5 into an interactive graph with plotyly.
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p1 <- ggplot(time_series_sum, aes(x=date, y=deaths_cases)) +
geom_line() +
labs(x="", y="Deaths/Cases", title="Fatality Rate in the U.S.") +
theme_linedraw() +
theme(plot.title=element_text(face="bold", hjust=0.5),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank())
ggplotly(p1)
Create an animated graph of your choosing using the time series data to display an aspect (e.g. states or countries) of the data that is important to you.
library(tidyverse)
time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")) %>%
select(7:length(.)) %>%
gather(key="date", value="cases", 6:length(.)) %>%
mutate(date = lubridate::mdy(date)) %>%
group_by(date, Province_State) %>%
summarize(total_cases = sum(cases)) %>%
filter(Province_State %in% c("Pennsylvania", "Washington", "Massachusetts"))
## Parsed with column specification:
## cols(
## .default = col_double(),
## iso2 = col_character(),
## iso3 = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Combined_Key = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed)
## # A tibble: 6 x 3
## # Groups: date [2]
## date Province_State total_cases
## <date> <chr> <dbl>
## 1 2020-01-22 Massachusetts 0
## 2 2020-01-22 Pennsylvania 0
## 3 2020-01-22 Washington 1
## 4 2020-01-23 Massachusetts 0
## 5 2020-01-23 Pennsylvania 0
## 6 2020-01-23 Washington 1
library(ggplot2)
library(gganimate)
library(transformr)
library(gifski)
p1 <- ggplot(time_series_confirmed, aes(x=date, y=total_cases)) +
labs(x="", y="confirmed cases", title="COVID-19 Cases by State") +
geom_line(aes(color=Province_State)) +
theme_linedraw() +
theme(plot.title=element_text(face="bold", hjust=0.5),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
legend.position="bottom",
legend.title=element_blank()) +
transition_reveal(date)
animate(p1, renderer = gifski_renderer(), end_pause = 15)